$ Overlaying the summaries with the raw data
ggplot(data = pf,
aes(x = age, y = friend_count)) +
#coord_cartesian(xlim = c(13,70),ylim(0,1000)) +
xlim(13,70) +
ylim(0,1000) +
geom_point(
alpha = 1/25,
position = position_jitter( height = 0),
color = "orange"
) +
coord_trans(y = 'sqrt') +
geom_line(stat = 'summary', fun.y = mean) +
geom_line(
stat = 'summary',
fun.y = quantile,
fun.args = list(probs = .9),
color = "blue",
linetype = 2
) +
geom_line(
stat = 'summary',
fun.y = quantile,
fun.args = list(probs = .1),
color = "blue",
linetype = 2
) +
geom_line(
stat = 'summary',
fun.y = quantile,
fun.args = list(probs = .5),
color = "blue"
)

Correlation : The correlation coefficient of two variables in a data set equals to their covariance divided by the product of their individual standard deviations. It is a normalized measurement of how the two are linearly related.
cor.test(
pf$age,
pf$friend_count,
method = "pearson",
alternative = "two.sided",
conf.level = 0.95
)
Pearson's product-moment correlation
data: pf$age and pf$friend_count
t = -8.6268, df = 99001, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.03363072 -0.02118189
sample estimates:
cor
-0.02740737
# Alternate way
with(pf,cor.test(age,friend_count, method = "pearson"))
Pearson's product-moment correlation
data: age and friend_count
t = -8.6268, df = 99001, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.03363072 -0.02118189
sample estimates:
cor
-0.02740737
Does not look linear relationship. This may be influence by older age. Lets get the same things to restrict the data to 70 yeas age
with(filter(pf,age <= 70),cor.test(age,friend_count,method = "pearson"))
Pearson's product-moment correlation
data: age and friend_count
t = -52.592, df = 91029, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1780220 -0.1654129
sample estimates:
cor
-0.1717245
by scatter plot looks like it is monotonuous relationship between age and friends count. person method does not work well with monotonour relationship hence, lets take spearman which is very much supportive to monotonous relationship. Wiki link for monotonous function - https://en.wikipedia.org/wiki/Monotonic_function
with(filter(pf,age <= 70),cor.test(age,friend_count,method = "spearman"))
Cannot compute exact p-value with ties
Spearman's rank correlation rho
data: age and friend_count
S = 1.5782e+14, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.2552934
rho- -0.2552934 improved over pearson method
Pearson r correlation: measure the degree of the relationship between linearly related variables.
Spearman rank correlation: Spearman rank correlation is a non-parametric test that is used to measure the degree of association between two variables.Spearman rank correlation test does not assume any assumptions about the distribution of the data.
Kendall rank correlation: Kendall rank correlation is a non-parametric test that measures the strength of dependence between two variables.
understand the correlation between www_likes_received and like received
ggplot(data = pf, aes(www_likes_received,likes_received)) +
geom_point(color="black", alpha = 1/20, position = position_jitter(height = 0)) +
xlim(0,quantile(pf$www_likes_received,0.95)) +
ylim(0,quantile(pf$likes_received,0.95)) +
geom_smooth(method = 'lm',color='red')

Correlation test between these two variables
with(pf,cor.test(www_likes_received,likes_received))
Pearson's product-moment correlation
data: www_likes_received and likes_received
t = 937.1, df = 99001, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.9473553 0.9486176
sample estimates:
cor
0.9479902
library(alr3)
l.Mitchell <- Mitchell
ggplot(data = l.Mitchell, aes(x = Month, y = Temp)) +
geom_point()

with(l.Mitchell, cor.test(Month, Temp))
Pearson's product-moment correlation
data: Month and Temp
t = 0.81816, df = 202, p-value = 0.4142
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.08053637 0.19331562
sample estimates:
cor
0.05747063
Quite week replationship
Making Sense Of Data by breaking x axis to 12 months
ggplot(data = l.Mitchell, aes(x = Month, y = Temp)) +
geom_point() +
scale_x_continuous(breaks = seq(0,203,12))

There are other measures of associations that can detect this. The dcor.ttest() function in the energy package implements a non-parametric test of the independence of two variables.
library(energy)
with(l.Mitchell, dcor.ttest(Month,Temp))
dcor t-test of independence
data: Month and Temp
T = -0.93904, df = 20501, p-value = 0.8261
sample estimates:
Bias corrected dcor
-0.006558215
pf$age_with_months <- pf$age + ((12 - pf$dob_month)/12)
suppressMessages(library(dplyr))
pf.fc_by_age_months <- pf %>%
group_by(age_with_months) %>%
summarise(friend_count_mean = mean(friend_count),
friend_count_median = median(friend_count),
n = n()) %>%
arrange(age_with_months)
p1 <- ggplot(data = filter(pf.fc_by_age_months,age_with_months < 71),
aes(x = age_with_months,y = friend_count_mean)) +
geom_line() +
geom_smooth()
p2 <- ggplot(data = filter(pf.fc_by_age,age <71),
aes(x = age, y = fc_mean)) +
geom_line() +
geom_smooth()
suppressMessages(library(gridExtra))
grid.arrange(p1,p2,ncol = 1)

LS0tCnRpdGxlOiAiRXhwbG9yYXRvcnkgRGF0YSBBbmFseXNpcyBUd28gVmFyaWFibGVzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCiMjIHJlYWQgdGhlIHNhbXBsZSBkYXRhIGZpbGUKYGBge3J9CnBmIDwtIHJlYWQuZGVsaW0oImh0dHBzOi8vczMuYW1hem9uYXdzLmNvbS91ZGFjaXR5LWhvc3RlZC1kb3dubG9hZHMvdWQ2NTEvcHNldWRvX2ZhY2Vib29rLnRzdiIsc2VwICA9ICdcdCcpCmBgYAoKIyMgc2NhdHRlciBwbG90IGJldHdlZW4gYWdlIGFuZCBmcmllbmQgY291bnRzCmBgYHtyfQpsaWJyYXJ5KGdncGxvdDIpCmdncGxvdCggZGF0YSA9IHBmKSArCiAgZ2VvbV9wb2ludChhZXMoeCA9IGFnZSwgeSA9IGZyaWVuZF9jb3VudCksIGFscGhhID0gMS8yMCkgKwogIHhsaW0oMTMsOTApICAjIFBlb3BsZSBOb3JtYWxseSBhZ2Ugb24gZmFjZWJvb2sKCnN1bW1hcnkocGYkYWdlKQpgYGAKRmFjZWJvb2sgeW91bmcgdXNlcnMgaGF2ZSBtcm9lIGZyaWVuZCB0aGV5IG9uIGF2ZXJhZ2UgaGF2ZSAxMDAwIGZyaWVuZHMgZXhjZXB0IGZldyBhcmUgYmluIHRoZSByYW5nZSBvZiAxMDAwIHRvIDUwMDAuIFRoZXJlIGlzIHNwaWtlIGFyb3VuZCBhZ2UgNjkgd2hpY2ggaXMgcXVpdGUgbWF0Y2hpbmcgd2l0aCB5b3VuZyBhZ2UgdXNlcnMuIENoYW5jZXMgYXJlIHF1aXRlIGhpZ2ggdGhhdCB5b3VuZyB1c2VycyBhcmUgdXNpbmcgYWdlIDY5IHdpdGggdGhlIHBvc3NpYmxpdHkgdGhleSBkb24ndCB3YW50IHRvIHNoYXJlIGFnZSBpbmZvLiBQb3NzaWJpbHR5IG9mIGZha2UgYWdlLgoKIyMgQWRkaW5nIHNxdWFyZSByb290IGZ1bmN0aW9uIG9mIGZyaWVuZCBjb3VudCAoeSAtYXhpcykKYGBge3J9CmdncGxvdCggZGF0YSA9IHBmKSArCiAgZ2VvbV9wb2ludChhZXMoeCA9IGFnZSwgeSA9IGZyaWVuZF9jb3VudCksIGFscGhhID0gMS8yMCwgcG9zaXRpb24gPSBwb3NpdGlvbl9qaXR0ZXIoIGhlaWdodCA9IDApKSArIAogIHhsaW0oMTMsOTApICsKICBjb29yZF90cmFucyh5ID0gJ3NxcnQnKQpgYGAKCiMjIEV4cGxvcmUgdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIGZyaWVuZHMgaW5pdGlhdGVkIGFuZCBhZ2UKYGBge3J9CmdncGxvdChkYXRhID0gcGYpICsKICBnZW9tX3BvaW50KGFlcyh4ID0gYWdlLCB5ID0gZnJpZW5kc2hpcHNfaW5pdGlhdGVkKSxhbHBoYSA9IDEvMjUscG9zaXRpb24gPSBwb3NpdGlvbl9qaXR0ZXIoIGhlaWdodCA9IDApKSArCiAgeGxpbSgxMyw5MCkgKwogIGNvb3JkX3RyYW5zKHkgPSAnc3FydCcpCmBgYAoKIyMgQ29uZGl0aW9uYWwgbWVhbgpgYGB7cn0KbGlicmFyeShkcGx5cikKcGYuZmNfYnlfYWdlIDwtIHBmICU+JQogIGdyb3VwX2J5KGFnZSkgJT4lCiAgc3VtbWFyaXNlKGZjX21lYW4gPSBtZWFuKGZyaWVuZF9jb3VudCksCiAgICAgICAgICAgIGZjX21lZGlhbiA9IG1lZGlhbihmcmllbmRfY291bnQpLAogICAgICAgICAgICBuID0gbigpKSAlPiUKICBhcnJhbmdlKGFnZSkKCmhlYWQocGYuZmNfYnlfYWdlKQpgYGAKIyMgUGxvdCBtZWFuIGZyaWVuZCBjb3VudCB2cy4gYWdlIHVzaW5nIGEgbGluZSBncmFwaApgYGB7cn0KZ2dwbG90KGRhdGEgPSBwZi5mY19ieV9hZ2UpICsKICBnZW9tX2xpbmUoYWVzKHggPSBhZ2UsIHkgPSBmY19tZWFuKSkKYGBgCgojJCBPdmVybGF5aW5nIHRoZSBzdW1tYXJpZXMgd2l0aCB0aGUgcmF3IGRhdGEKYGBge3J9CmdncGxvdChkYXRhID0gcGYsCiAgICAgICBhZXMoeCA9IGFnZSwgeSA9IGZyaWVuZF9jb3VudCkpICsKICAjY29vcmRfY2FydGVzaWFuKHhsaW0gPSBjKDEzLDcwKSx5bGltKDAsMTAwMCkpICsKICB4bGltKDEzLDcwKSArCiAgeWxpbSgwLDEwMDApICsKICBnZW9tX3BvaW50KAogICAgYWxwaGEgPSAxLzI1LAogICAgcG9zaXRpb24gPSBwb3NpdGlvbl9qaXR0ZXIoIGhlaWdodCA9IDApLAogICAgY29sb3IgPSAib3JhbmdlIgogICAgKSArCiAgY29vcmRfdHJhbnMoeSA9ICdzcXJ0JykgKwogIGdlb21fbGluZShzdGF0ID0gJ3N1bW1hcnknLCBmdW4ueSA9IG1lYW4pICsKICBnZW9tX2xpbmUoCiAgICBzdGF0ID0gJ3N1bW1hcnknLCAKICAgIGZ1bi55ID0gcXVhbnRpbGUsCiAgICBmdW4uYXJncyA9IGxpc3QocHJvYnMgPSAuOSksCiAgICBjb2xvciA9ICJibHVlIiwKICAgIGxpbmV0eXBlID0gMgogICAgKSArIAogICBnZW9tX2xpbmUoCiAgICBzdGF0ID0gJ3N1bW1hcnknLCAKICAgIGZ1bi55ID0gcXVhbnRpbGUsCiAgICBmdW4uYXJncyA9IGxpc3QocHJvYnMgPSAuMSksCiAgICBjb2xvciA9ICJibHVlIiwKICAgIGxpbmV0eXBlID0gMgogICAgKSArIAogIGdlb21fbGluZSgKICAgIHN0YXQgPSAnc3VtbWFyeScsIAogICAgZnVuLnkgPSBxdWFudGlsZSwKICAgIGZ1bi5hcmdzID0gbGlzdChwcm9icyA9IC41KSwKICAgIGNvbG9yID0gImJsdWUiCiAgICApIApgYGAKIyMgQ29ycmVsYXRpb24gOiBUaGUgY29ycmVsYXRpb24gY29lZmZpY2llbnQgb2YgdHdvIHZhcmlhYmxlcyBpbiBhIGRhdGEgc2V0IGVxdWFscyB0byB0aGVpciBjb3ZhcmlhbmNlIGRpdmlkZWQgYnkgdGhlIHByb2R1Y3Qgb2YgdGhlaXIgaW5kaXZpZHVhbCBzdGFuZGFyZCBkZXZpYXRpb25zLiBJdCBpcyBhIG5vcm1hbGl6ZWQgbWVhc3VyZW1lbnQgb2YgaG93IHRoZSB0d28gYXJlIGxpbmVhcmx5IHJlbGF0ZWQuCgpgYGB7cn0KY29yLnRlc3QoCiAgcGYkYWdlLAogIHBmJGZyaWVuZF9jb3VudCwKICBtZXRob2QgPSAicGVhcnNvbiIsCiAgYWx0ZXJuYXRpdmUgPSAidHdvLnNpZGVkIiwKICBjb25mLmxldmVsID0gMC45NQogICkKCiMgQWx0ZXJuYXRlIHdheQp3aXRoKHBmLGNvci50ZXN0KGFnZSxmcmllbmRfY291bnQsIG1ldGhvZCA9ICJwZWFyc29uIikpICAKYGBgCkRvZXMgbm90IGxvb2sgbGluZWFyIHJlbGF0aW9uc2hpcC4gVGhpcyBtYXkgYmUgaW5mbHVlbmNlIGJ5IG9sZGVyIGFnZS4gTGV0cyBnZXQgdGhlIHNhbWUgdGhpbmdzIHRvIHJlc3RyaWN0IHRoZSBkYXRhIHRvIDcwIHllYXMgYWdlCgpgYGB7cn0Kd2l0aChmaWx0ZXIocGYsYWdlIDw9IDcwKSxjb3IudGVzdChhZ2UsZnJpZW5kX2NvdW50LG1ldGhvZCA9ICJwZWFyc29uIikpCmBgYApieSBzY2F0dGVyIHBsb3QgbG9va3MgbGlrZSBpdCBpcyBtb25vdG9udW91cyByZWxhdGlvbnNoaXAgYmV0d2VlbiBhZ2UgYW5kIGZyaWVuZHMgY291bnQuIHBlcnNvbiBtZXRob2QgZG9lcyBub3Qgd29yayB3ZWxsIHdpdGggbW9ub3Rvbm91ciByZWxhdGlvbnNoaXAgaGVuY2UsIGxldHMgdGFrZSBzcGVhcm1hbiB3aGljaCBpcyB2ZXJ5IG11Y2ggc3VwcG9ydGl2ZSB0byBtb25vdG9ub3VzIHJlbGF0aW9uc2hpcC4gV2lraSBsaW5rIGZvciBtb25vdG9ub3VzIGZ1bmN0aW9uIC0gaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvTW9ub3RvbmljX2Z1bmN0aW9uCgpgYGB7cn0Kd2l0aChmaWx0ZXIocGYsYWdlIDw9IDcwKSxjb3IudGVzdChhZ2UsZnJpZW5kX2NvdW50LG1ldGhvZCA9ICJzcGVhcm1hbiIpKQpgYGAKcmhvLSAtMC4yNTUyOTM0ICBpbXByb3ZlZCBvdmVyIHBlYXJzb24gbWV0aG9kCgpQZWFyc29uIHIgY29ycmVsYXRpb246IG1lYXN1cmUgdGhlIGRlZ3JlZSBvZiB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gbGluZWFybHkgcmVsYXRlZCB2YXJpYWJsZXMuCgpTcGVhcm1hbiByYW5rIGNvcnJlbGF0aW9uOiBTcGVhcm1hbiByYW5rIGNvcnJlbGF0aW9uIGlzIGEgbm9uLXBhcmFtZXRyaWMgdGVzdCB0aGF0IGlzIHVzZWQgdG8gbWVhc3VyZSB0aGUgZGVncmVlIG9mIGFzc29jaWF0aW9uIGJldHdlZW4gdHdvIHZhcmlhYmxlcy5TcGVhcm1hbiByYW5rIGNvcnJlbGF0aW9uIHRlc3QgZG9lcyBub3QgYXNzdW1lIGFueSBhc3N1bXB0aW9ucyBhYm91dCB0aGUgZGlzdHJpYnV0aW9uIG9mIHRoZSBkYXRhLgoKS2VuZGFsbCByYW5rIGNvcnJlbGF0aW9uOiBLZW5kYWxsIHJhbmsgY29ycmVsYXRpb24gaXMgYSBub24tcGFyYW1ldHJpYyB0ZXN0IHRoYXQgbWVhc3VyZXMgdGhlIHN0cmVuZ3RoIG9mIGRlcGVuZGVuY2UgYmV0d2VlbiB0d28gdmFyaWFibGVzLiAKCiMjIHVuZGVyc3RhbmQgdGhlIGNvcnJlbGF0aW9uIGJldHdlZW4gd3d3X2xpa2VzX3JlY2VpdmVkIGFuZCBsaWtlIHJlY2VpdmVkCmBgYHtyfQpnZ3Bsb3QoZGF0YSA9ICBwZiwgYWVzKHd3d19saWtlc19yZWNlaXZlZCxsaWtlc19yZWNlaXZlZCkpICsKICBnZW9tX3BvaW50KGNvbG9yPSJibGFjayIsIGFscGhhID0gMS8yMCwgcG9zaXRpb24gPSBwb3NpdGlvbl9qaXR0ZXIoaGVpZ2h0ID0gMCkpICsKICB4bGltKDAscXVhbnRpbGUocGYkd3d3X2xpa2VzX3JlY2VpdmVkLDAuOTUpKSArCiAgeWxpbSgwLHF1YW50aWxlKHBmJGxpa2VzX3JlY2VpdmVkLDAuOTUpKSAgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICdsbScsY29sb3I9J3JlZCcpCgpgYGAKCiMjIENvcnJlbGF0aW9uIHRlc3QgYmV0d2VlbiB0aGVzZSB0d28gdmFyaWFibGVzCmBgYHtyfQp3aXRoKHBmLGNvci50ZXN0KHd3d19saWtlc19yZWNlaXZlZCxsaWtlc19yZWNlaXZlZCkpCmBgYApgYGB7cn0KbGlicmFyeShhbHIzKQpsLk1pdGNoZWxsIDwtIE1pdGNoZWxsCgpnZ3Bsb3QoZGF0YSA9IGwuTWl0Y2hlbGwsIGFlcyh4ID0gTW9udGgsIHkgPSBUZW1wKSkgKwogIGdlb21fcG9pbnQoKQoKd2l0aChsLk1pdGNoZWxsLCBjb3IudGVzdChNb250aCwgVGVtcCkpCmBgYApRdWl0ZSB3ZWVrIHJlcGxhdGlvbnNoaXAKCiMjIE1ha2luZyBTZW5zZSBPZiBEYXRhIGJ5IGJyZWFraW5nIHggYXhpcyB0byAxMiBtb250aHMKYGBge3J9CmdncGxvdChkYXRhID0gbC5NaXRjaGVsbCwgYWVzKHggPSBNb250aCwgeSA9IFRlbXApKSArCiAgZ2VvbV9wb2ludCgpICsKICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzID0gc2VxKDAsMjAzLDEyKSkgIyBzaW5jZSBkaXNjZWF0ZSBpcyBub3Qgd29ya2luZwoKYGBgClRoZXJlIGFyZSBvdGhlciBtZWFzdXJlcyBvZiBhc3NvY2lhdGlvbnMgdGhhdCBjYW4gZGV0ZWN0IHRoaXMuIFRoZSBkY29yLnR0ZXN0KCkgZnVuY3Rpb24gaW4gdGhlIGVuZXJneSBwYWNrYWdlIGltcGxlbWVudHMgYSBub24tcGFyYW1ldHJpYyB0ZXN0IG9mIHRoZSBpbmRlcGVuZGVuY2Ugb2YgdHdvIHZhcmlhYmxlcy4KCmBgYHtyfQpsaWJyYXJ5KGVuZXJneSkKd2l0aChsLk1pdGNoZWxsLCBkY29yLnR0ZXN0KE1vbnRoLFRlbXApKQpgYGAKYGBge3J9CnBmJGFnZV93aXRoX21vbnRocyAgPC0gcGYkYWdlICsgKCgxMiAtIHBmJGRvYl9tb250aCkvMTIpCmBgYApgYGB7cn0Kc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KGRwbHlyKSkKcGYuZmNfYnlfYWdlX21vbnRocyA8LSBwZiAlPiUKICBncm91cF9ieShhZ2Vfd2l0aF9tb250aHMpICU+JQogIHN1bW1hcmlzZShmcmllbmRfY291bnRfbWVhbiA9IG1lYW4oZnJpZW5kX2NvdW50KSwKICAgICAgICAgICAgZnJpZW5kX2NvdW50X21lZGlhbiA9IG1lZGlhbihmcmllbmRfY291bnQpLAogICAgICAgICAgICBuID0gbigpKSAlPiUKICBhcnJhbmdlKGFnZV93aXRoX21vbnRocykKYGBgCgoKYGBge3J9CnAxIDwtIGdncGxvdChkYXRhID0gZmlsdGVyKHBmLmZjX2J5X2FnZV9tb250aHMsYWdlX3dpdGhfbW9udGhzIDwgNzEpLAogICAgICAgYWVzKHggPSBhZ2Vfd2l0aF9tb250aHMseSA9IGZyaWVuZF9jb3VudF9tZWFuKSkgKwogIGdlb21fbGluZSgpICsKICBnZW9tX3Ntb290aCgpCgpwMiA8LSBnZ3Bsb3QoZGF0YSA9IGZpbHRlcihwZi5mY19ieV9hZ2UsYWdlIDw3MSksCiAgICAgICAgICAgICBhZXMoeCA9IGFnZSwgeSA9IGZjX21lYW4pKSArCiAgZ2VvbV9saW5lKCkgKwogIGdlb21fc21vb3RoKCkKCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShncmlkRXh0cmEpKQpncmlkLmFycmFuZ2UocDEscDIsbmNvbCA9IDEpCmBgYAoK